library(gridExtra)
library(dplyr)
library(viridis)
library(ggmap)
library(plyr)
library(ggplot2)
library(usmap)
library(leaflet)
library(mapproj)
library(dlookr)
library(stringr)
library(kableExtra)
data("USArrests")
kbl(psych::headTail(USArrests,6,6), caption="First and last 6 variables in the data", booktabs = T) %>% kable_styling(latex_options = "striped",font_size=10)
First and last 6 variables in the data
Murder Assault UrbanPop Rape
Alabama 13.2 236 58 21.2
Alaska 10 263 48 44.5
Arizona 8.1 294 80 31
Arkansas 8.8 190 50 19.5
California 9 276 91 40.6
Colorado 7.9 204 78 38.7
… … … … …
Vermont 2.2 48 32 11.2
Virginia 8.5 156 63 20.7
Washington 4 145 73 26.2
West Virginia 5.7 81 39 9.3
Wisconsin 2.6 53 66 10.8
Wyoming 6.8 161 60 15.6
str(USArrests)
## 'data.frame':    50 obs. of  4 variables:
##  $ Murder  : num  13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
##  $ Assault : int  236 263 294 190 276 204 110 238 335 211 ...
##  $ UrbanPop: int  58 48 80 50 91 78 77 72 80 60 ...
##  $ Rape    : num  21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
glimpse(USArrests)
## Rows: 50
## Columns: 4
## $ Murder   <dbl> 13.2, 10.0, 8.1, 8.8, 9.0, 7.9, 3.3, 5.9, 15.4, 17.4, 5.3, 2…
## $ Assault  <int> 236, 263, 294, 190, 276, 204, 110, 238, 335, 211, 46, 120, 2…
## $ UrbanPop <int> 58, 48, 80, 50, 91, 78, 77, 72, 80, 60, 83, 54, 83, 65, 57, …
## $ Rape     <dbl> 21.2, 44.5, 31.0, 19.5, 40.6, 38.7, 11.1, 15.8, 31.9, 25.8, …
summary(USArrests)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00
library(funModeling)
profiling_num(USArrests) 
##   variable    mean   std_dev variation_coef   p_01   p_05    p_25   p_50
## 1   Murder   7.788  4.355510      0.5592591  1.437  2.145   4.075   7.25
## 2  Assault 170.760 83.337661      0.4880397 45.490 50.250 109.000 159.00
## 3 UrbanPop  65.540 14.474763      0.2208539 35.430 44.000  54.500  66.00
## 4     Rape  21.232  9.366385      0.4411447  7.545  8.750  15.075  20.10
##      p_75    p_95    p_99   skewness kurtosis     iqr        range_98
## 1  11.250  15.400  16.763  0.3820378 2.135329   7.175 [1.437, 16.763]
## 2 249.000 297.300 336.020  0.2273179 1.930980 140.000 [45.49, 336.02]
## 3  77.750  86.550  90.020 -0.2191719 2.215790  23.250  [35.43, 90.02]
## 4  26.175  39.745  45.265  0.7769613 3.201898  11.100 [7.545, 45.265]
##        range_80
## 1 [2.56, 13.32]
## 2 [56.9, 279.6]
## 3    [45, 83.2]
## 4 [10.67, 32.4]
library(naniar)
vis_miss(USArrests)

dlookr::plot_outlier(USArrests)

USArrests %>%
plot_normality(Murder,Assault, UrbanPop,Rape)

library(maps)
crimes <- data.frame(state=tolower(rownames(USArrests)), USArrests)
gg <- ggplot(crimes, aes(map_id=state, fill=Murder))
gg <- gg + geom_map(map=map_data("state"))
gg <- gg + expand_limits(x=map_data("state")$long, y=map_data("state")$lat)
gg + labs(title="Murder rates per 100,000 in 1973", x="longitude", y="latitude") +
scale_fill_distiller(palette = "Reds", limits = c(0,18)) 

ggplot(data=USArrests, aes(x=rownames(USArrests), y=Murder)) +
  geom_bar(stat="identity", position=position_dodge(),color="darkblue",fill="darkorange")+
  geom_text(aes(label=Murder), vjust=0.5,hjust=-0.1, color="black",
            position = position_dodge(0.9), size=2, fontface="bold")+
  theme_grey()+labs(title = "Number of Murder in each city", x="State")+ coord_flip()

library(ggrepel)
ggplot(USArrests, aes(x = Murder, y = Assault)) + 
  geom_point()  +
  geom_label_repel(aes(label = rownames(USArrests),color=rownames(USArrests)), fontface = "bold") +
  theme(legend.position = "none") + 
  geom_smooth(method = lm, se = FALSE)+ labs(title="Scatter plot of murder and assault for each state")

library(ggplot2)
library(ggExtra)

p1 <- ggplot(USArrests, aes(x=UrbanPop
, y=Rape)) +
  geom_point(size=2,color="darkred") +
  theme(legend.position="bottom")+ 
  geom_smooth(method = lm, se = F)+labs(title="Scatter plot of Urban population and rape", subtitle = "Relationship betweenpopulation and rape in 1973")+
theme(
  plot.title = element_text(vjust = -1), plot.subtitle = element_text(vjust=-1.5))

ggMarginal(p1, type="boxplot", size=7, fill = "slateblue")

library(corrplot)
library(PerformanceAnalytics)

res <- cor(USArrests, method="spearman")
corrplot::corrplot(res, method= "color", order = "hclust", addCoef.col = "black", 
         tl.col="black", tl.srt=45)

library(plotly)
fig <- plot_ly(USArrests, x = ~rownames(USArrests))
fig <- fig %>% add_lines(y = ~Rape)
fig <- fig %>% add_lines(y = ~Murder)
fig <- fig %>% add_lines(y = ~Assault)
fig <- fig %>% add_lines(y = ~UrbanPop)


fig <- fig %>% layout(
    title = "All of the variables for each city",
    xaxis = list(
      rangeselector = list(
        buttons = list(
          list(
            count = 3,
            label = "3 mo",
            step = "month",
            stepmode = "backward"),
          list(
            count = 6,
            label = "6 mo",
            step = "month",
            stepmode = "backward"),
          list(
            count = 1,
            label = "1 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 1,
            label = "YTD",
            step = "year",
            stepmode = "todate"),
          list(step = "all"))),

      rangeslider = list(type = "date")),

    yaxis = list(title = "value"))
fig1 <- plot_ly(USArrests, x = ~Murder, y = ~Assault, marker = list(color = 'rgba(222,45,38,0.8)',
                      line = list(color = 'rgb(8,48,107)',
                                  width = 1.5)))

fig1<- fig1 %>% layout(title = "Relationship between assault and murder")
fig1
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` was deprecated in dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
library(plotly)

data(mtcars)
p <- plot_ly() %>%
  add_trace(data = mtcars, 
            x = ~cyl, y = ~mpg, 
            type = 'scatter',
            mode = 'lines') %>%
  layout(autosize = T,
         title = "Test",
         xaxis = list(title = "<a href = 'https://www.nytimes.com/'>The NY TIMES</a>"))
Sys.setenv("plotly_username"="your_plotly_username")
Sys.setenv("plotly_api_key"="your_api_key")